Map of Cases and Deaths in Indiana

Normalized by county population.

  1. Import data
  2. Add fips
  3. Add population info
  4. Plot some maps
In [1]:
from datetime import datetime, timedelta
import math
import os
import time
import json


from plotly.offline import init_notebook_mode, iplot
from bokeh.io import output_notebook
from bokeh.models import FuncTickFormatter, ColumnDataSource
from bokeh.plotting import figure, output_file, show
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

# bokeh: configure for notebook
# https://docs.bokeh.org/en/latest/docs/user_guide/jupyter.html#userguide-jupyter-notebook
output_notebook()

# load data: check if we have a fresh local version (8 hours ago or newer)
#           if we don't have a fresh version, pull down a remote csv
def download_data_source(local_path):
    """Download latest data and save to 'local_path'"""
    import requests

    data_source = (
        "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
    )
    resp = requests.get(data_source, stream=True)
    resp.raise_for_status()
    with open(local_path, "wb") as openfile:
        for block in resp.iter_content(1024):
            openfile.write(block)


local_path = os.path.join(".", "us-counties.csv")
if os.path.isfile(local_path):
    mtime = int(os.stat(local_path).st_mtime)
    now = int(time.time())
    age = (now - mtime) / 60 / 60  # to horus
    if age >= 8:
        print("Local Data: is stale - downloading")
        download_data_source(local_path)
else:
    print("Local Data: not found - downloading")
    download_data_source(local_path)

print("Local Data: loading from file")
df = pd.read_csv(local_path, dtype={"fips": str})
df = df[df.state == "Indiana"]  # filter to only indiana data
df = df[df.county != "Unknown"]  # filter out unknown county
df.head()
Loading BokehJS ...
Local Data: loading from file
Out[1]:
date county state fips cases deaths
613 2020-03-06 Marion Indiana 18097 1 0
697 2020-03-07 Marion Indiana 18097 1 0
794 2020-03-08 Hendricks Indiana 18063 1 0
795 2020-03-08 Marion Indiana 18097 1 0
914 2020-03-09 Hendricks Indiana 18063 2 0
In [2]:
# download geojson
def download_geojson(local_path):
    import requests
    data_source = "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
    resp = requests.get(data_source, stream=True)
    resp.raise_for_status()
    with open(local_path, "wb") as openfile:
        for block in resp.iter_content(1024):
            openfile.write(block)


geojson_local_path = os.path.join(".", "geojson-counties-fips.json")
if not os.path.isfile(geojson_local_path):
    print("Geojson: downloading")
    download_geojson(geojson_local_path)

with open(geojson_local_path, "r") as openfile:
    geojson = json.loads(openfile.read())
    print("Geojson: loaded data")


print(json.dumps(geojson, indent=2, sort_keys=True)[:100])
Geojson: loaded data
{
  "features": [
    {
      "geometry": {
        "coordinates": [
          [
            [
     
In [3]:
# import county population data
with open("in-county-populations--modified.json", "r") as openfile:
    in_county_data = json.loads(openfile.read())


df["population"] = df.county.apply(lambda county: in_county_data[county]["Pop"])
df["cases_pop"] = df.apply(lambda row: (row.cases / row.population) * 100, axis=1)
df["deaths_pop"] = df.apply(lambda row: (row.deaths / row.population) * 100, axis=1)
df.head()
Out[3]:
date county state fips cases deaths population cases_pop deaths_pop
613 2020-03-06 Marion Indiana 18097 1 0 954670 0.000105 0.0
697 2020-03-07 Marion Indiana 18097 1 0 954670 0.000105 0.0
794 2020-03-08 Hendricks Indiana 18063 1 0 167009 0.000599 0.0
795 2020-03-08 Marion Indiana 18097 1 0 954670 0.000105 0.0
914 2020-03-09 Hendricks Indiana 18063 2 0 167009 0.001198 0.0
In [4]:
# Set some defaults
in_center = {"lat": 39.766028, "lon": -86.441278}
default_cloropleth_kwargs = dict(
    geojson=geojson,
    locations="fips",
    color_continuous_scale='Plasma',
    mapbox_style="carto-positron",
    zoom=5.4,
    center=in_center,
    opacity=0.5,
)
In [5]:
cases_pop_values = df.cases_pop.unique()
_min = min(cases_pop_values)
_max = max(cases_pop_values)
fig = px.choropleth_mapbox(
    df,
    color="cases_pop",
    range_color=(_min, _max),
    labels={"cases_pop": "Percent Infected"},
    hover_data=["county", "cases"],
    **default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()
In [6]:
cases_values = df.cases.unique()
_min = min(cases_values)
_max = max(cases_values)
fig = px.choropleth_mapbox(
    df,
    color="cases",
    range_color=(_min, _max),
    labels={"cases": "Number of Cases"},
    hover_data=["county", "cases"],
    **default_cloropleth_kwargs
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()